-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Hexagon] Implement isUsedByReturnOnly #167637
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-hexagon Author: Sudharsan Veeravalli (svs-quic) ChangesPrior to this patch, libcalls inserted by the Full diff: https://github.com/llvm/llvm-project/pull/167637.diff 10 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 04a97606cb7f8..894a07e6b68c2 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3958,3 +3958,41 @@ bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
return false;
return Mask->getValue().isPowerOf2();
}
+
+// Check if the result of the node is only used as a return value, as
+// otherwise we can't perform a tail-call.
+bool HexagonTargetLowering::isUsedByReturnOnly(SDNode *N,
+ SDValue &Chain) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->user_begin();
+
+ if (Copy->getOpcode() == ISD::BITCAST) {
+ return isUsedByReturnOnly(Copy, Chain);
+ }
+
+ if (Copy->getOpcode() != ISD::CopyToReg) {
+ return false;
+ }
+
+ // If the ISD::CopyToReg has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
+ return false;
+
+ // The copy must be used by a HexagonISD::RET_GLUE, and nothing else.
+ bool HasRet = false;
+ for (SDNode *Node : Copy->users()) {
+ if (Node->getOpcode() != HexagonISD::RET_GLUE)
+ return false;
+ HasRet = true;
+ }
+ if (!HasRet)
+ return false;
+
+ Chain = Copy->getOperand(0);
+ return true;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 4ac3e7671592a..f4d2a79051c10 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -162,6 +162,8 @@ class HexagonTargetLowering : public TargetLowering {
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+ bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
+
/// Return true if an FMA operation is faster than a pair of mul and add
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
/// method returns true (and FMAs are legal), otherwise fmuladd is
diff --git a/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll b/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll
index 6bc60132d3e6a..831ab0a980368 100644
--- a/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll
+++ b/llvm/test/CodeGen/Hexagon/fast-math-libcalls.ll
@@ -9,15 +9,8 @@ define float @fast_sqrt_f32(float %x) {
; CHECK-LABEL: fast_sqrt_f32:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_fast2_sqrtf
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_fast2_sqrtf
; CHECK-NEXT: }
%result = call nnan ninf nsz afn float @llvm.sqrt.f32(float %x)
ret float %result
@@ -27,15 +20,8 @@ define double @fast_sqrt_f64(double %x) {
; CHECK-LABEL: fast_sqrt_f64:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
-; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_fast2_sqrtdf2
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_fast2_sqrtdf2
; CHECK-NEXT: }
%result = call nnan ninf nsz afn double @llvm.sqrt.f64(double %x)
ret double %result
@@ -61,15 +47,8 @@ define double @fast_add_f64(double %x, double %y) {
; CHECK-LABEL: fast_add_f64:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
-; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_fast_adddf3
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_fast_adddf3
; CHECK-NEXT: }
%result = fadd nnan ninf nsz afn double %x, %y
ret double %result
@@ -95,15 +74,8 @@ define double @fast_sub_f64(double %x, double %y) {
; CHECK-LABEL: fast_sub_f64:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_fast_subdf3
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_fast_subdf3
; CHECK-NEXT: }
%result = fsub nnan ninf nsz afn double %x, %y
ret double %result
@@ -129,15 +101,8 @@ define double @fast_mul_f64(double %x, double %y) {
; CHECK-LABEL: fast_mul_f64:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_fast_muldf3
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_fast_muldf3
; CHECK-NEXT: }
%result = fmul nnan ninf nsz afn double %x, %y
ret double %result
@@ -194,15 +159,8 @@ define double @fast_div_f64(double %x, double %y) {
; CHECK-LABEL: fast_div_f64:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
-; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_fast_divdf3
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_fast_divdf3
; CHECK-NEXT: }
%result = fdiv nnan ninf nsz afn double %x, %y
ret double %result
@@ -217,15 +175,8 @@ define float @sqrt_f32__afn(float %x) {
; CHECK-LABEL: sqrt_f32__afn:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
-; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_sqrtf
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_sqrtf
; CHECK-NEXT: }
%result = call afn float @llvm.sqrt.f32(float %x)
ret float %result
@@ -235,15 +186,8 @@ define float @sqrt_f32__afn_ninf(float %x) {
; CHECK-LABEL: sqrt_f32__afn_ninf:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_sqrtf
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_sqrtf
; CHECK-NEXT: }
%result = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %result
@@ -253,15 +197,8 @@ define float @sqrt_f32__afn_nnan(float %x) {
; CHECK-LABEL: sqrt_f32__afn_nnan:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_sqrtf
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_sqrtf
; CHECK-NEXT: }
%result = call afn nnan float @llvm.sqrt.f32(float %x)
ret float %result
@@ -271,15 +208,8 @@ define float @sqrt_f32__nnan(float %x) {
; CHECK-LABEL: sqrt_f32__nnan:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
-; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_sqrtf
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_sqrtf
; CHECK-NEXT: }
%result = call nnan float @llvm.sqrt.f32(float %x)
ret float %result
@@ -289,15 +219,8 @@ define float @sqrt_f32_nnan_ninf_afn(float %x) {
; CHECK-LABEL: sqrt_f32_nnan_ninf_afn:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: .cfi_def_cfa r30, 8
-; CHECK-NEXT: .cfi_offset r31, -4
-; CHECK-NEXT: .cfi_offset r30, -8
-; CHECK-NEXT: {
-; CHECK-NEXT: call __hexagon_sqrtf
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump __hexagon_sqrtf
; CHECK-NEXT: }
%result = call nnan ninf afn float @llvm.sqrt.f32(float %x)
ret float %result
diff --git a/llvm/test/CodeGen/Hexagon/fminmax-v67.ll b/llvm/test/CodeGen/Hexagon/fminmax-v67.ll
index ba4fcb5afdba3..8ce34210c38cf 100644
--- a/llvm/test/CodeGen/Hexagon/fminmax-v67.ll
+++ b/llvm/test/CodeGen/Hexagon/fminmax-v67.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: t1
-; CHECK: call fmax
+; CHECK: jump fmax
define dso_local double @t1(double %a, double %b) local_unnamed_addr {
entry:
@@ -11,7 +11,7 @@ entry:
}
; CHECK-LABEL: t2
-; CHECK: call fmin
+; CHECK: jump fmin
define dso_local double @t2(double %a, double %b) local_unnamed_addr {
entry:
@@ -20,7 +20,7 @@ entry:
}
; CHECK-LABEL: t3
-; CHECK: call fmaxf
+; CHECK: jump fmaxf
define dso_local float @t3(float %a, float %b) local_unnamed_addr {
entry:
@@ -29,7 +29,7 @@ entry:
}
; CHECK-LABEL: t4
-; CHECK: call fminf
+; CHECK: jump fminf
define dso_local float @t4(float %a, float %b) local_unnamed_addr {
entry:
diff --git a/llvm/test/CodeGen/Hexagon/fminmax.ll b/llvm/test/CodeGen/Hexagon/fminmax.ll
index 2aae79e6b9bf3..e134168aefdfd 100644
--- a/llvm/test/CodeGen/Hexagon/fminmax.ll
+++ b/llvm/test/CodeGen/Hexagon/fminmax.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i
target triple = "hexagon"
; CHECK-LABEL: cfminf
-; CHECK: call fminf
+; CHECK: jump fminf
define float @cfminf(float %x, float %y) #0 {
entry:
%call = tail call float @fminf(float %x, float %y) #1
@@ -12,7 +12,7 @@ entry:
}
; CHECK-LABEL: cfmaxf
-; CHECK: call fmaxf
+; CHECK: jump fmaxf
define float @cfmaxf(float %x, float %y) #0 {
entry:
%call = tail call float @fmaxf(float %x, float %y) #1
@@ -20,7 +20,7 @@ entry:
}
; CHECK-LABEL: minnum
-; CHECK: call fminf
+; CHECK: jump fminf
define float @minnum(float %x, float %y) #0 {
entry:
%call = tail call float @llvm.minnum.f32(float %x, float %y) #1
@@ -28,7 +28,7 @@ entry:
}
; CHECK-LABEL: maxnum
-; CHECK: call fmaxf
+; CHECK: jump fmaxf
define float @maxnum(float %x, float %y) #0 {
entry:
%call = tail call float @llvm.maxnum.f32(float %x, float %y) #1
diff --git a/llvm/test/CodeGen/Hexagon/fp16.ll b/llvm/test/CodeGen/Hexagon/fp16.ll
index 2f933c92e42b8..40211f2a1a656 100644
--- a/llvm/test/CodeGen/Hexagon/fp16.ll
+++ b/llvm/test/CodeGen/Hexagon/fp16.ll
@@ -13,7 +13,7 @@
; Validate that we generate correct lib calls to convert fp16
;CHECK-LABEL: @test1
-;CHECK: call __extendhfsf2
+;CHECK: jump __extendhfsf2
;CHECK: r0 = memuh
define dso_local float @test1(ptr nocapture readonly %a) local_unnamed_addr #0 {
entry:
diff --git a/llvm/test/CodeGen/Hexagon/inline-division-space.ll b/llvm/test/CodeGen/Hexagon/inline-division-space.ll
index c1937600d47bf..711a00bb9de5b 100644
--- a/llvm/test/CodeGen/Hexagon/inline-division-space.ll
+++ b/llvm/test/CodeGen/Hexagon/inline-division-space.ll
@@ -14,7 +14,7 @@ entry:
; Function Attrs: optsize
define dso_local float @testFloat(float %a, float %b) local_unnamed_addr #0 {
entry:
-;CHECK: call __hexagon_divsf3
+;CHECK: jump __hexagon_divsf3
%div = fdiv float %a, %b
ret float %div
}
@@ -22,7 +22,7 @@ entry:
; Function Attrs: optsize
define dso_local double @testDouble(double %a, double %b) local_unnamed_addr #0 {
entry:
-;CHECK: call __hexagon_divdf3
+;CHECK: jump __hexagon_divdf3
%div = fdiv double %a, %b
ret double %div
}
diff --git a/llvm/test/CodeGen/Hexagon/inline-division.ll b/llvm/test/CodeGen/Hexagon/inline-division.ll
index 5eb97a002b0f4..b1b5fde53b3c6 100644
--- a/llvm/test/CodeGen/Hexagon/inline-division.ll
+++ b/llvm/test/CodeGen/Hexagon/inline-division.ll
@@ -23,7 +23,7 @@ entry:
define dso_local double @testDouble(double %a, double %b) local_unnamed_addr {
entry:
-;CHECK: call __hexagon_divdf3
+;CHECK: jump __hexagon_divdf3
%div = fdiv double %a, %b
ret double %div
}
diff --git a/llvm/test/CodeGen/Hexagon/libcall_tail.ll b/llvm/test/CodeGen/Hexagon/libcall_tail.ll
new file mode 100644
index 0000000000000..2ea95abe8055a
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/libcall_tail.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; Test that libcalls used only by return are tail called.
+; This tests non-float libcalls
+; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @udiv(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: udiv:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: jump __hexagon_udivsi3
+; CHECK-NEXT: }
+ %1 = udiv i32 %a, %b
+ ret i32 %1
+}
+
+define i32 @udivconstby(i32 %a) nounwind {
+; CHECK-LABEL: udivconstby:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r0,#10)
+; CHECK-NEXT: jump __hexagon_udivsi3
+; CHECK-NEXT: }
+ %1 = udiv i32 10, %a
+ ret i32 %1
+}
+
+define i32 @sdiv(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: sdiv:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: jump __hexagon_divsi3
+; CHECK-NEXT: }
+ %1 = sdiv i32 %a, %b
+ ret i32 %1
+}
+
+define i32 @sdivconstby(i32 %a) nounwind {
+; CHECK-LABEL: sdivconstby:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r0,#10)
+; CHECK-NEXT: jump __hexagon_divsi3
+; CHECK-NEXT: }
+ %1 = sdiv i32 10, %a
+ ret i32 %1
+}
+
+define i32 @urem(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: urem:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: jump __hexagon_umodsi3
+; CHECK-NEXT: }
+ %1 = urem i32 %a, %b
+ ret i32 %1
+}
+
+define i32 @uremconstby(i32 %a) nounwind {
+; CHECK-LABEL: uremconstby:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r0,#10)
+; CHECK-NEXT: jump __hexagon_umodsi3
+; CHECK-NEXT: }
+ %1 = urem i32 10, %a
+ ret i32 %1
+}
+
+define i32 @srem(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: srem:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: jump __hexagon_modsi3
+; CHECK-NEXT: }
+ %1 = srem i32 %a, %b
+ ret i32 %1
+}
+
+define i32 @sremconstby(i32 %a) nounwind {
+; CHECK-LABEL: sremconstby:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r0,#10)
+; CHECK-NEXT: jump __hexagon_modsi3
+; CHECK-NEXT: }
+ %1 = srem i32 10, %a
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/Hexagon/llvm.exp10.ll b/llvm/test/CodeGen/Hexagon/llvm.exp10.ll
index b5fcc4151225a..cd94d328f1fee 100644
--- a/llvm/test/CodeGen/Hexagon/llvm.exp10.ll
+++ b/llvm/test/CodeGen/Hexagon/llvm.exp10.ll
@@ -66,11 +66,7 @@ define float @exp10_f32(float %x) #0 {
; CHECK-LABEL: exp10_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: {
-; CHECK-NEXT: call exp10f
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump exp10f
; CHECK-NEXT: }
%r = call float @llvm.exp10.f32(float %x)
ret float %r
@@ -103,11 +99,7 @@ define double @exp10_f64(double %x) #0 {
; CHECK-LABEL: exp10_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: {
-; CHECK-NEXT: call exp10
-; CHECK-NEXT: allocframe(r29,#0):raw
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r31:30 = dealloc_return(r30):raw
+; CHECK-NEXT: jump exp10
; CHECK-NEXT: }
%r = call double @llvm.exp10.f64(double %x)
ret double %r
|
Prior to this patch, libcalls inserted by the
SelectionDAGlegalizer could never be tailcalled. The eligibility of libcalls for tail calling is is partly determined by checkingTargetLowering::isInTailCallPositionand comparing the return type of the libcall and the caller.isInTailCallPositionin turn callsTargetLowering::isUsedByReturnOnly(which always returns false if not implemented by the target).